介绍ggplot2 对于图层的理解 data, aes,geom
library(ggplot2)
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
介绍语法(Grammar of Graphics)
data, aesthetics, and geom layers
mpg (miles per galon) weight (in thousands of pounds) disp(displacement of the car engine) 在一副图里展示多个变量 # A scatter plot has been made for you
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point()
ggplot(mtcars, aes(x = wt, y = mpg, color = disp)) +
geom_point()
ggplot(mtcars, aes(x = wt, y = mpg, size = disp)) +
geom_point()
有没有发现图的边上还增加了图注(legend)aes里面出了color, size,还有shape,但是shape不能用在这里
介绍ggplot语法: data,aesthetic,geometries,介绍qplot,statistics,坐标系和分面,themes(本次课程介绍到qplot)
str(diamonds)
## Classes 'tbl_df', 'tbl' and 'data.frame': 53940 obs. of 10 variables:
## $ carat : num 0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
## $ cut : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
## $ color : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
## $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
## $ depth : num 61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
## $ table : num 55 61 65 58 58 57 57 55 61 61 ...
## $ price : int 326 326 327 334 335 336 336 337 337 338 ...
## $ x : num 3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
## $ y : num 3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
## $ z : num 2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...
ggplot(diamonds, aes(x = carat, y = price)) +
geom_point()
ggplot(diamonds, aes(x = carat, y = price))+
geom_point()+
geom_smooth()
ggplot(diamonds, aes(x = carat, y = price)) +
geom_smooth()
ggplot(diamonds, aes(x = carat, y = price, color = clarity)) +
geom_smooth()
alpha调节的是透明度
ggplot(diamonds, aes(x = carat, y = price, color = clarity)) +
geom_point(alpha = 0.4)
dia_plot <- ggplot(diamonds, aes(x = carat, y = price))
dia_plot + geom_point()
dia_plot + geom_point(aes(color = clarity))
plot(mtcars$wt, mtcars$mpg, col = mtcars$cyl)
mtcars$fcyl <- as.factor(mtcars$cyl)
plot(mtcars$wt, mtcars$mpg, col = mtcars$fcyl)
carModel <- lm(mpg ~ wt, data = mtcars)
mtcars$cyl <- as.factor(mtcars$cyl)
plot(mtcars$wt, mtcars$mpg, col = mtcars$cyl)
#Call abline() with carModel as first argument and set lty to 2
abline(carModel, lty = 2)
# Plot each subset efficiently with lapply # You don’t have to edit this code
plot(mtcars$wt, mtcars$mpg, col = mtcars$cyl)
lapply(mtcars$cyl, function(x) {
abline(lm(mpg ~ wt, mtcars, subset = (cyl == x)), col = x)
})
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
##
## [[6]]
## NULL
##
## [[7]]
## NULL
##
## [[8]]
## NULL
##
## [[9]]
## NULL
##
## [[10]]
## NULL
##
## [[11]]
## NULL
##
## [[12]]
## NULL
##
## [[13]]
## NULL
##
## [[14]]
## NULL
##
## [[15]]
## NULL
##
## [[16]]
## NULL
##
## [[17]]
## NULL
##
## [[18]]
## NULL
##
## [[19]]
## NULL
##
## [[20]]
## NULL
##
## [[21]]
## NULL
##
## [[22]]
## NULL
##
## [[23]]
## NULL
##
## [[24]]
## NULL
##
## [[25]]
## NULL
##
## [[26]]
## NULL
##
## [[27]]
## NULL
##
## [[28]]
## NULL
##
## [[29]]
## NULL
##
## [[30]]
## NULL
##
## [[31]]
## NULL
##
## [[32]]
## NULL
# This code will draw the legend of the plot 不能单独运行
legend(x = 5, y = 33, legend = levels(mtcars$cyl),
col = 1:3, pch = 1, bty = "n")
mtcars$cyl <- as.factor(mtcars$cyl)
ggplot(mtcars, aes(x = wt, y = mpg, col = cyl)) +
geom_point()
ggplot(mtcars, aes(x = wt, y = mpg, col = cyl)) +
geom_point()+
geom_smooth(method = "lm", se = FALSE)
ggplot(mtcars, aes(x = wt, y = mpg, col = cyl)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
geom_smooth(aes(group = 1), method = "lm", se = FALSE, linetype = 2)
###the group aesthetic will tell ggplot() to draw a single linear model through all the points. ###高下立判!!
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width)) +
geom_point() +
geom_point(aes(x = Petal.Length, y = Petal.Width), col = "red")
ggplot(iris.wide, aes(x = Length, y = Width, col = Part)) + geom_point()
library(tidyverse)
iris.tidy <- iris %>%
gather(pm,Value,-Species) %>%
separate(pm,c("Part","Measure"),sep = "\\.")
iris.tidy_g <- gather(iris,pm,Value,-Species)
iris.tidy <- separate(iris.tidy_g,pm,c("Part","Measure"),sep = "\\.")
iris.wide <- iris.tidy %>% spread(Measure, value) # 会报错的原因是行重复
iris$Flower <- 1:nrow(iris)
iris.wide <- iris %>%
gather(key, value, -Species, -Flower) %>%
separate(key, c("Part","Measure"), "\\.") %>%
spread(Measure, value) %>%
select(-Flower)
iris.tidy_g <- gather(iris,pm,value,-Species,-Flower)
iris.tidy_s <- separate(iris.tidy_g,pm,c("Part","Measure"),sep = "\\.")
iris.tidy_sp <- spread(iris.tidy_s,Measure,value)
iris.wide <- select(iris.tidy_sp,-Flower)
iris <- select(iris,-Flower)
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
str(iris.wide)
## 'data.frame': 300 obs. of 4 variables:
## $ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Part : chr "Petal" "Sepal" "Petal" "Sepal" ...
## $ Length : num 1.4 5.1 1.4 4.9 1.3 4.7 1.5 4.6 1.4 5 ...
## $ Width : num 0.2 3.5 0.2 3 0.2 3.2 0.2 3.1 0.2 3.6 ...
str(iris.tidy)
## 'data.frame': 600 obs. of 4 variables:
## $ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Part : chr "Sepal" "Sepal" "Sepal" "Sepal" ...
## $ Measure: chr "Length" "Length" "Length" "Length" ...
## $ Value : num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
ggplot(iris.tidy, aes(x = Species, y = Value, col = Part)) +
geom_jitter() +
facet_grid(.~Measure)
head(iris) head(iris.wide) head(iris.tidy)
ggplot(iris.wide, aes(x = Length, y = Width, color = Part)) +
geom_jitter() +
facet_grid(. ~ Species)
##可以尝试一下不要分面的感觉(如果感觉分面引入的太快,可以暂时不用放在心上)
在geom中应该叫做属性 All about aesthetics # 1 - Map mpg to x and cyl to y
ggplot(mtcars, aes(x = mpg, y = cyl)) +
geom_point()
ggplot(mtcars, aes(x= cyl, y = mpg)) +
geom_point()
ggplot(mtcars, aes(x = wt, y = mpg, col = cyl)) +
geom_point()
ggplot(mtcars, aes(x =wt, y = mpg, col = cyl)) +
geom_point(shape = 1, size = 4)
color和fill的区别,geom_point是个例外我们使用color代替fill 1.The default geom_point() uses shape = 19(a solid circle with an outline the same colour as the inside) 2.Good alternatives are shape = 1 (hollow) and shape = 16(solid, no outline) 3.A really nice alternative is shape = 21 which allows you to use both fill for the inside and col for the outline!
class(mtcars$am)
## [1] "numeric"
class(mtcars$cyl)
## [1] "factor"
class(mtcars$wt)
## [1] "numeric"
转换成因子,如果不转,也可以,做完了就知道很丑
mtcars$am <- as.factor(mtcars$am)
mtcars$cyl <- as.factor(mtcars$cyl)
class(mtcars$am)
## [1] "factor"
class(mtcars$cyl)
## [1] "factor"
class(mtcars$wt)
## [1] "numeric"
ggplot(mtcars, aes(x = wt, y = mpg, col = cyl)) +
geom_point(shape = 1, size = 4)
ggplot(mtcars, aes(x = wt, y = mpg, fill = cyl)) +
geom_point(shape = 1, size = 4)
ggplot(mtcars, aes(x = wt, y = mpg, fill = cyl)) +
geom_point(shape = 21, size = 4,alpha = 0.6)
ggplot(mtcars, aes(x = wt, y = mpg, fill = cyl,col = am)) +
geom_point(shape = 21, size = 4,alpha = 0.6)
ggplot(mtcars, aes(x = wt, y= mpg, size = cyl)) +
geom_point()
## Warning: Using size for a discrete variable is not advised.
ggplot(mtcars, aes(x = wt, y= mpg,alpha = cyl)) +
geom_point()
ggplot(mtcars, aes(x = wt, y= mpg,shape = cyl)) +
geom_point()
ggplot(mtcars, aes(x = wt, y= mpg,label = cyl)) +
geom_text()
Shapes in R can have a value from 1-25. Shapes 1-20 can only accept a color aesthetic, but shapes 21-25 have both a color and a fill aesthetic.
my_color <- "#4ABEFF"
ggplot(mtcars, aes(x = wt, y = mpg, color = cyl)) +
geom_point()
ggplot(mtcars, aes(x = wt, y = mpg, color = cyl)) +
geom_point(color = my_color)
ggplot(mtcars, aes(x = wt, y = mpg, fill = cyl)) +
geom_point(size = 10, shape = 23, color = my_color)
ggplot(mtcars, aes(x = wt, y = mpg, fill = cyl)) +
geom_point(alpha = 0.5)
ggplot(mtcars, aes(x = wt, y = mpg, fill = cyl)) +
geom_point(shape = 24, color = "yellow")
ggplot(mtcars, aes(x = wt, y = mpg, fill = cyl)) +
geom_text(label = rownames(mtcars), color = "red")
mpg – Miles/(US) gallon 油耗 cyl – Number of cylinders 气缸数 disp – Displacement (cu.in.) 排量 hp – Gross horsepower 马力 drat – Rear axle ratio 后轴比 wt – Weight (lb/1000) 重量 qsec – 1/4 mile time 1/4英里冲刺 vs – V/S engine. 发动机 am – Transmission (0 = automatic, 1 = manual) 传动 gear – Number of forward gears 前进档数 carb – Number of carburetors 化油器
过犹不及 分清楚aesthetics and attributes Variables in a data frame are mapped to aesthetics in aes(). (e.g. aes(col = cyl)) within ggplot() Visual elements are set by attributes in specific geom layers (geom_point(col = “red”)).
ggplot(mtcars, aes(x = mpg, y = qsec, col = factor(cyl))) +
geom_point()
ggplot(mtcars, aes(x = mpg, y = qsec, col = factor(cyl), shape = factor(am))) +
geom_point()
ggplot(mtcars, aes(x = mpg, y = qsec, col = factor(cyl), shape = factor(am), size = (hp/wt))) +
geom_point()
##Modiing Aesthetics position jitter Scale Function
cyl.am <- ggplot(mtcars, aes(x = factor(cyl), fill = factor(am)))
cyl.am +
geom_bar()
cyl.am +
geom_bar(position = "fill")
cyl.am +
geom_bar(position = "dodge")
val = c("#E41A1C", "#377EB8")
lab = c("Manual", "Automatic")
cyl.am +
geom_bar(position = "dodge") +
scale_x_discrete("Cylinders") +
scale_y_continuous("Number") +
scale_fill_manual("Transmission",
values = val,
labels = lab)
ggplot(mtcars, aes(x = mpg, y =0)) + geom_jitter() + geom_point()
ggplot(mtcars, aes(x = mpg, y =0)) + geom_jitter() + geom_point()+ scale_y_continuous(limits = c(-2,2))
图形是为了更加直观,不是为了更加好看,越用心越难看
ggplot(mtcars, aes(x = wt, y = mpg, col = factor(cyl))) +
geom_point(size =4)
ggplot(mtcars, aes(x = wt, y = mpg, col = factor(cyl))) +
geom_point(size =4,shape = 1)
ggplot(mtcars, aes(x = wt, y = mpg, col = factor(cyl))) +
geom_point(size =4,alpha = 0.6)
ggplot(diamonds, aes(x = carat, y = price, color = clarity)) +
geom_point()
# Adjust for overplotting #Copy the above functions and set the alpha to 0.5
ggplot(diamonds, aes(x = carat, y = price, color = clarity)) +
geom_point(alpha = 0.5)
ggplot(diamonds, aes(x = clarity, y = carat, color = price)) +
geom_point(alpha = 0.5)
ggplot(diamonds, aes(x = clarity, y = carat, color = price)) +
geom_point(alpha = 0.5,position = "jitter")
要讲三种,scatter, bar,line ##Scatter plots and jittering (2)目的是为了防止overplotting # Shown in the viewer:
ggplot(mtcars, aes(x = cyl, y = wt)) +
geom_point()
# Solutions: # 1 - With geom_jitter()
ggplot(mtcars, aes(x = cyl, y = wt)) +
geom_jitter()
ggplot(mtcars, aes(x = cyl, y = wt)) +
geom_jitter(width = 0.1)
# 3 - Set position = position_jitter() in geom_point() ()
ggplot(mtcars, aes(x = cyl, y = wt)) +
geom_point(position = position_jitter(0.1))
jitter can be 1) an argument in geom_point(position = ‘jitter’), 2) a geom itself, geom_jitter(), or 3) a position function, position_jitter(0.1)
The Vocab dataset contains information about the years of education and integer score on a vocabulary test for over 21,000 individuals based on US General Social Surveys from 1972-2004. # Vocab数据集在car这个包里面 # install.packages(“car”) # Examine the structure of Vocab
library(car)
str(Vocab)
## 'data.frame': 21638 obs. of 4 variables:
## $ year : int 2004 2004 2004 2004 2004 2004 2004 2004 2004 2004 ...
## $ sex : Factor w/ 2 levels "Female","Male": 1 1 2 1 2 2 1 2 2 1 ...
## $ education : int 9 14 14 17 14 14 12 10 11 9 ...
## $ vocabulary: int 3 6 9 8 1 7 6 6 5 1 ...
ggplot(Vocab,aes(x = education, y = vocabulary)) +
geom_point()
ggplot(Vocab,aes(x = education, y = vocabulary)) +
geom_jitter()
ggplot(Vocab,aes(x = education, y = vocabulary)) +
geom_jitter(alpha = 0.2)
ggplot(Vocab,aes(x = education, y = vocabulary)) +
geom_jitter(alpha = 0.2,shape = 1)
hisogram直方图